##############################################################
# Replication code ##########################
# "Legislative Responsiveness, Urban Growth, and Popular Mobilization" 
# Jérémie Langlois and Marwa Shalaby
##############################################################

## Load Package #############################################################
library(tidyverse) 
library(lubridate)
library(MASS)
library(lmtest)
library(sandwich)
library(stargazer)
library(sf)
library(cowplot)
library(here)
library(modelsummary)
library(viridis)
library(ggrepel)
library(scales)
library(corrplot)
library(knitr)
library(RColorBrewer)
library(ggrepel)
library(grid)

initialize_settings <- function() {
  knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
  Sys.setlocale("LC_ALL", "en_US.UTF-8")
}

## Paths ###################################################
base_folder <- here::here()

paths <- list(
  district_key   = file.path(base_folder, "districtIDkey.csv"),
  protests       = file.path(base_folder, "protests.csv"),
  district_info  = file.path(base_folder, "district_info.csv"),
  questions      = file.path(base_folder, "PQs.csv"),
  massacres      = file.path(base_folder, "algeriamassacres.csv"),
  nightlights    = file.path(base_folder, "algeria_nightlights_yearly.csv"),
  population     = file.path(base_folder, "adm1_population_statistics.csv"),
  shapefile      = file.path(base_folder, "DZA-ADM1-shapefile")
)

load_data <- function(file_path, quiet = TRUE) {
  if (!file.exists(file_path)) {
    stop(sprintf("File not found: %s", file_path))
  }
  read_csv(file_path, show_col_types = quiet)
}

## Process Data  ##########################
process_base_data <- function() {
  
  district_key <- load_data(paths$district_key) %>%
    dplyr::select(District_ID_new, ID_1)
  
  protests_data <- load_data(paths$protests) %>%
    arrange(District_ID_new, month_year) %>%
    group_by(District_ID_new) %>%
    mutate(
      daftar_lag2 = lag(count_obs),
      repression_lag2 = lag(repression)
    ) %>%
    ungroup()
  
  questions_data <- load_data(paths$questions) %>%
    arrange(District_ID_new, month_year) %>%
    group_by(District_ID_new) %>%
    mutate(
      across(c(questions, local), 
             list(lag2 = ~lag(., 1),
                  lag3 = ~lag(., 2),
                  lag6 = ~lag(., 5)))
    ) %>%
    ungroup()
  
  list(
    district_key = district_key,
    protests = protests_data,
    questions = questions_data
  )
}

create_analysis_dataset <- function(base_data) {
  analysis_data <- base_data$protests %>%
    mutate(month_year = as.Date(month_year)) %>%
    left_join(base_data$questions, by = c("District_ID_new", "month_year")) %>%
    left_join(load_data(paths$district_info), by = "District_ID_new") %>%
    mutate(
      seventh_leg = month_year < as.Date("2017-05-01"),
      year = year(month_year),
      FLN_prop_X = if_else(seventh_leg, FLN_prop_2012, FLN_prop_2017),
      islamist_prop_X = if_else(seventh_leg, islamist_prop_2012, islamist_prop_2017),
      voter_turnout_X = if_else(seventh_leg, turnout_rate_2012, real_turnout_rate),
      Hirak = ifelse(month_year > as.Date("2019-02-01") & 
                       month_year < as.Date("2020-03-01"), 1, 0)
    ) %>%
    dplyr::select(-matches("(FLN|islamist)_prop_20(12|17)"),
                  -matches("turnout_rate_20(12|17)"),
                  -valid_rate_2017)
  
  analysis_data <- analysis_data %>%
    left_join(
      load_data(paths$massacres) %>%
        dplyr::select(District_ID_new = gov, logmass10, massacres) %>%
        filter(!is.na(District_ID_new)) %>%
        distinct() %>%
        mutate(log_massacres = log10(massacres + 0.1)),
      by = "District_ID_new"
    )
  
  pop_data <- load_data(paths$population) %>%
    filter(year > 2013) %>%
    mutate(year = year + 1) %>%
    mutate(WilayaPop = round(total_adm1_pop, 0),
           UrbanPop = round(total_urban_pop, 0)
    ) %>% 
    dplyr::select(-total_adm1_pop, -total_urban_pop) %>% 
    left_join(base_data$district_key, by = "ID_1")
  
  lights_data <- load_data(paths$nightlights) %>%
    dplyr::select(NAME_1, mean, year)
  
  admin_data <- st_read(paths$shapefile, quiet = TRUE) %>%
    st_drop_geometry() %>%
    dplyr::select(NAME_1, ID_1) 
  
  analysis_data <- analysis_data %>%
    left_join(pop_data, by = c("District_ID_new", "year")) %>%
    left_join(admin_data, by = "ID_1") %>%
    left_join(lights_data, by = c("NAME_1", "year"))
  
  return(analysis_data)
}

prepare_model_data <- function(analysis_data) {
  model_data <- analysis_data %>%
    filter(year > 2014) %>%
    rename(UrbanGrowth = UrbanChange) %>% 
    mutate(
      Repression = replace_na(repression_lag2, 0),
      QuestionsLog = log(questions + 1),
      LocalLog = log(local + 1),
      QuestionsLogCentered = QuestionsLog - mean(QuestionsLog, na.rm = TRUE),
      LocalLogCentered = LocalLog - mean(LocalLog, na.rm = TRUE),
      PopulationLog = log(WilayaPop),
      across(c(QuestionsLog, LocalLog, UrbanGrowth), 
             list(Centered = ~scale(., scale = FALSE))),
      UrbanGrowthSQ = UrbanGrowth^2,
      UrbanGrowthSQCentered = scale(UrbanGrowthSQ, scale = FALSE),
      UrbanGrowthCentered = UrbanGrowth - mean(UrbanGrowth, na.rm = TRUE),
      NightlightCapita = round(mean / (WilayaPop/100000), 2),
      month = factor(month(month_year)),
      Hirak = as.integer(Hirak),
      covid = between(month_year, as.Date("2020-02-01"), as.Date("2020-07-01"))
    ) %>% 
    rename(
      Protests = count_obs,
      LagDV = daftar_lag2,
      Massacres = log_massacres
    ) %>% 
    mutate(
      VoterTurnout = voter_turnout_X,
      Islamist = islamist_prop_X,
      FLN = FLN_prop_X
    ) %>% 
    dplyr::select(-voter_turnout_X, 
                  -islamist_prop_X, 
                  -FLN_prop_X)
  
  return(model_data)
}

initialize_settings()

base_data <- process_base_data()
analysis_data <- create_analysis_dataset(base_data)
model_data <- prepare_model_data(analysis_data)

model_data <- model_data %>% 
  rename(code = District_ID_new)

pre_covid_data <- model_data %>% 
  filter(month_year < as.Date('2020-03-01'))

pre_hirak_data <- model_data %>% 
  filter(month_year < as.Date('2019-03-01'))


## Main Models ########

fit_model <- function(formula, data) {
  model <- glm.nb(formula, data = data, control = glm.control(maxit = 100))
  coef <- coeftest(model, vcov = function(x) vcovHC(x, type = "HC1", cluster = data$code))
  list(model = model, coef = coef)
}

models <- list(
  VV11 = fit_model(
    Protests ~ QuestionsLog + UrbanGrowth + UrbanShare + 
      PopulationLog + NightlightCapita + VoterTurnout + FLN + 
      LagDV + Hirak + factor(month) + program_territory, 
    model_data
  ),
  
  VV1 = fit_model(
    Protests ~ QuestionsLog + UrbanGrowth + UrbanGrowthSQ + 
      UrbanShare + PopulationLog + NightlightCapita + VoterTurnout + 
      FLN + LagDV + Hirak + month + program_territory, 
    model_data
  ),
  
  VV22 = fit_model(
    Protests ~ LocalLog + UrbanGrowth + UrbanShare + 
      PopulationLog + NightlightCapita + VoterTurnout + FLN + 
      LagDV + Hirak + program_territory + month, 
    model_data
  ),
  
  VV2 = fit_model(
    Protests ~ LocalLog + UrbanGrowth + UrbanGrowthSQ + 
      UrbanShare + PopulationLog + NightlightCapita + VoterTurnout + 
      FLN + LagDV + Hirak + program_territory + month, 
    model_data
  ),
  
  VV33 = fit_model(
    Protests ~ QuestionsLogCentered*UrbanGrowthCentered + 
      UrbanShare + PopulationLog + NightlightCapita + VoterTurnout + 
      FLN + LagDV + factor(program_territory) + Hirak + factor(month), 
    model_data
  ),
  
  VV3 = fit_model(
    Protests ~ QuestionsLogCentered*UrbanGrowthCentered + 
      UrbanGrowthSQCentered + UrbanShare + PopulationLog + 
      NightlightCapita + VoterTurnout + FLN + LagDV + 
      factor(program_territory) + Hirak + factor(month), 
    model_data
  ),
  
  VV44 = fit_model(
    Protests ~ LocalLogCentered*UrbanGrowthCentered + 
      UrbanShare + PopulationLog + NightlightCapita + VoterTurnout + 
      FLN + LagDV + factor(program_territory) + Hirak + factor(month), 
    model_data
  ),
  
  VV4 = fit_model(
    Protests ~ LocalLogCentered*UrbanGrowthCentered + 
      UrbanGrowthSQCentered + UrbanShare + PopulationLog + 
      NightlightCapita + VoterTurnout + FLN + LagDV + 
      factor(program_territory) + Hirak + factor(month), 
    model_data
  )
)

create_results_table <- function(models, output_file, format = "html") {
  stargazer(
    lapply(models, function(x) x$coef),
    type = format,
    out = output_file,
    title = "Main Results",
    star.cutoffs = c(0.1, 0.05, 0.01),
    single.row = FALSE,
    digits = 2,
    dep.var.labels = "Monthly Governorate Protest Count",
    omit = c("month", "program_territory"),
    add.lines = list(
      c("Month Fixed Effects", rep("X", length(models))),
      c("Region Fixed Effects", rep("X", length(models))),
      c("Observations", sapply(models, function(x) nobs(x$model))),
      c("AIC", formatC(sapply(models, function(x) AIC(x$model)), 
                       format = "f", big.mark = ",", digits = 3)),
      c("Log Likelihood", formatC(sapply(models, function(x) logLik(x$model)), 
                                  format = "f", big.mark = ",", digits = 2))
    )
  )
}

create_results_table(models, "main_models.html", "html")
create_results_table(models, "main_models.tex", "latex")

## Additional Models  ####################

fit_model_set <- function(data, model_prefix, additional_vars = "") {
  base_vars <- "UrbanShare + PopulationLog + NightlightCapita + VoterTurnout + FLN + LagDV"
  fe_vars <- "factor(month) + program_territory"
  
  formulas <- list(
    f11 = paste("Protests ~ QuestionsLog + UrbanGrowth +", base_vars, "+", 
                additional_vars, "+", fe_vars),
    f1 = paste("Protests ~ QuestionsLog + UrbanGrowth + UrbanGrowthSQ +", base_vars, "+", 
               additional_vars, "+", fe_vars),
    f22 = paste("Protests ~ LocalLog + UrbanGrowth +", base_vars, "+", 
                additional_vars, "+", fe_vars),
    f2 = paste("Protests ~ LocalLog + UrbanGrowth + UrbanGrowthSQ +", base_vars, "+", 
               additional_vars, "+", fe_vars),
    f33 = paste("Protests ~ QuestionsLogCentered*UrbanGrowthCentered +", base_vars, "+", 
                additional_vars, "+ factor(program_territory) + factor(month)"),
    f3 = paste("Protests ~ QuestionsLogCentered*UrbanGrowthCentered + UrbanGrowthSQCentered +", 
               base_vars, "+", additional_vars, "+ factor(program_territory) + factor(month)"),
    f44 = paste("Protests ~ LocalLogCentered*UrbanGrowthCentered +", base_vars, "+", 
                additional_vars, "+ factor(program_territory) + factor(month)"),
    f4 = paste("Protests ~ LocalLogCentered*UrbanGrowthCentered + UrbanGrowthSQCentered +", 
               base_vars, "+", additional_vars, "+ factor(program_territory) + factor(month)")
  )
  
  models <- lapply(seq_along(formulas), function(i) {
    model_name <- paste0(model_prefix, names(formulas)[i])
    fit_model(as.formula(formulas[[i]]), data)
  })
  
  names(models) <- paste0(model_prefix, names(formulas))
  return(models)
}

islamist_models <- fit_model_set(
  model_data, 
  "I", 
  "Hirak + Islamist"
)

repression_models <- fit_model_set(
  model_data, 
  "R", 
  "Hirak + Repression + Massacres"
)

covid_models <- fit_model_set(
  model_data, 
  "C", 
  "covid"
)

create_subset_analysis <- function(data, condition, prefix, additional_vars = "") {
  subset_data <- subset(data, eval(parse(text = condition)))
  fit_model_set(subset_data, prefix, additional_vars)
}

pre_covid_models <- create_subset_analysis(
  model_data,
  "month_year < as.Date('2020-03-01')",
  "SC",
  "Hirak"
)

pre_hirak_models <- create_subset_analysis(
  model_data,
  "month_year < as.Date('2019-03-01')",
  "H"
)

## Summary Statistics ###############
generate_model_summaries <- function(models, output_prefix) {
  aic_values <- sapply(models, function(x) AIC(x$model))
  loglik_values <- sapply(models, function(x) logLik(x$model))
  n_obs <- sapply(models, function(x) nobs(x$model))
  formatted_aic <- formatC(aic_values, format = "f", big.mark = ",", digits = 3)
  formatted_loglik <- formatC(loglik_values, format = "f", big.mark = ",", digits = 2)
  
  for (format in c("html", "latex")) {
    stargazer(
      lapply(models, function(x) x$coef),
      type = format,
      out = sprintf("%s_models.%s", output_prefix, format),
      title = sprintf("Model Results - %s", output_prefix),
      star.cutoffs = c(0.1, 0.05, 0.01),
      single.row = FALSE,
      digits = 2,
      notes.append = TRUE,
      dep.var.labels = "Monthly Governorate Protest Count",
      omit = c("month", "program_territory"),
      add.lines = list(
        c("Month Fixed Effects", rep("X", length(models))),
        c("Region Fixed Effects", rep("X", length(models))),
        c("Observations", n_obs),
        c("AIC", formatted_aic),
        c("Log Likelihood", formatted_loglik)
      )
    )
  }
}

model_sets <- list(
  islamist = islamist_models,
  repression = repression_models,
  covid = covid_models,
  pre_covid = pre_covid_models,
  pre_hirak = pre_hirak_models
)

for (name in names(model_sets)) {
  generate_model_summaries(model_sets[[name]], name)
}

generate_summary_statistics <- function(data) {
  dataset_name <- deparse(substitute(data))
  numeric_vars <- names(data)[sapply(data, is.numeric)]
  datasummary_skim(
    data[, numeric_vars],
    output = paste0(dataset_name, "_summary_statistics.tex"),
    fmt = 2
  )
  
  models_sumstats <- data %>%
    dplyr::select(FLN, Hirak, Islamist, LagDV, LocalLog, Massacres, NightlightCapita,
                  PopulationLog, Protests, QuestionsLog, Repression, UrbanGrowth,
                  UrbanGrowthSQ, UrbanPop, UrbanShare, VoterTurnout, WilayaPop, 
                  local, questions) %>%
    mutate(Hirak = as.numeric(as.character(Hirak))) %>% 
    dplyr::select(order(colnames(.))) %>% # Sort columns alphabetically
    summarise(across(everything(), list(
      Mean = ~mean(.x, na.rm = TRUE),
      SD = ~sd(.x, na.rm = TRUE),
      Min = ~min(.x, na.rm = TRUE),
      Median = ~median(.x, na.rm = TRUE),
      Max = ~max(.x, na.rm = TRUE)
    ))) %>%
    pivot_longer(cols = everything(),
                 names_to = c("Variable", ".value"),
                 names_sep = "_") %>%
    arrange(Variable) %>%
    mutate(across(where(is.numeric), ~ format(.x, scientific = FALSE))) %>% # Disable scientific notation
    knitr::kable(format = "latex", 
                 caption = "Summary Statistics",
                 booktabs = TRUE)
  
  writeLines(models_sumstats, paste0(dataset_name, "_models_summary_statistics.tex"))
}

generate_summary_statistics(model_data)
generate_summary_statistics(pre_hirak_data)
generate_summary_statistics(pre_covid_data)


## Correlation Plots ############
vars_for_corr <- c("QuestionsLog", "LocalLog", "UrbanGrowth", "UrbanGrowthSQ", 
                   "UrbanShare", "PopulationLog", "NightlightCapita", "VoterTurnout", 
                   "FLN", "Islamist", "Repression", "Massacres")

labels <- c("Questions (log)", "Local (log)", "UrbanGrowth", "UrbanGrowth^2", 
            "UrbanShare", "Population (log)", "Nightlight", "VoterTurnout", 
            "FLN", "Islamist", "Repression", "Massacres")

generate_corr_plot <- function(data, vars, display_labels, output_file, plot_title = NULL) {
  
  subset_data <- data[vars]
  
  corr_matrix <- cor(subset_data, use = "complete.obs")
  
  if (length(display_labels) == ncol(corr_matrix) && length(display_labels) == nrow(corr_matrix)) {
    colnames(corr_matrix) <- display_labels
    rownames(corr_matrix) <- display_labels
  } else {
    warning("Length of 'display_labels' does not match the number of variables in the correlation matrix. Default names will be used by corrplot.")
  }
  
  pdf(output_file, height = 8, width = 8)
  corrplot(
    corr_matrix,
    method = "color",
    type = "upper",
    order = "hclust", 
    addCoef.col = "black",
    tl.col = "black",
    tl.srt = 45,
    tl.cex = 0.9,
    number.cex = 0.8,
    col = colorRampPalette(RColorBrewer::brewer.pal(11, "PuOr"))(200), 
    diag = FALSE,
    mar = c(1, 1, 1, 1),
    main = plot_title 
  )
  dev.off()
  
}

generate_corr_plot(
  data = model_data, 
  vars = vars_for_corr, 
  display_labels = labels,
  output_file = "correlation_plot_main.pdf"
)

generate_corr_plot(
  data = pre_hirak_data, 
  vars = vars_for_corr, 
  display_labels = labels,
  output_file = "correlation_plot_pre_hirak.pdf"
)

generate_corr_plot(
  data = pre_covid_data, 
  vars = vars_for_corr, 
  display_labels = labels,
  output_file = "correlation_plot_pre_covid.pdf"
)


## Figure 1: Questions Fig  ####################################################
questions_monthly <- base_data$questions %>%
  group_by(month_year) %>%
  summarize(qcount = sum(questions)) %>%
  mutate(month_year = month_year %m-% months(1)) %>%
  filter(month_year > "2014-08-01")

# Define events
events <- data.frame(
  date = as.Date(c("2014-12-01", "2017-05-01", "2017-12-01", "2019-02-01",
                   "2019-04-01", "2020-04-01", "2021-03-01")),
  label = c("Ain Salah\nProtests", "2017 Legislative\nElections",
            "Teachers'\nStrikes Begin", "Hirak\nBegins",
            "Bouteflika\nResigns", "COVID-19\nLockdowns",
            "Parliament\nDissolved"),
  y_pos = c(-5, -5, -5, -5, -5, -5, -5),
  hjust = c(0.1, 0.5, 0.1, 0.9, 0.15, 0.7, 0.9)
)

questions_line <- ggplot(questions_monthly, aes(x = month_year, y = qcount)) +
  geom_line(color = "blue", size = 0.8) +
  
  geom_segment(data = events,
               aes(x = date, xend = date, y = 0, yend = 0.8 + y_pos), 
               size = 0.5) +
  geom_point(data = events,
             aes(x = date, y = -5),
             inherit.aes = FALSE,
             size = 1.6,                
             shape = 8)              +
  
  # arrow = arrow(type = "closed", length = unit(2.75, "mm"))) +
  geom_text(data = events,
            aes(x = date, y = y_pos - 1, label = label, hjust = hjust),
            vjust = 1.25, size = 2.5, lineheight = 0.9, family = "serif") +
  
  scale_x_date(breaks = seq.Date(as.Date("2015-01-01"), as.Date("2022-01-01"), by = "year"),
               labels = date_format("%Y"),
               limits = c(as.Date("2014-09-01"), as.Date("2021-06-01")),
               position = "top") +
  scale_y_continuous(breaks = seq(0, 100, 10), limits = c(-20, 100)) +
  
  theme_minimal() +
  theme(
    axis.text = element_text(size = 7, family = "serif", color = "black"),
    axis.title = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_line(colour = "gray85", linetype = "dashed"),
    panel.grid.major.x = element_blank()
  ) +
  
  geom_hline(yintercept = 0, color = "black", size = 0.5) +
  geom_segment(aes(x = x, xend = x, y = 0, yend = 100),
               data = data.frame(x = as.Date(paste0(2015:2021, "-01-01"))),
               color = "gray70", size = 0.5, inherit.aes = FALSE)

ggsave("questions_line.pdf", questions_line, width = 14, height = 7, units = "cm", dpi = 1000)



## Figure 2: Map of Total Protests by Governorate  #############################

shapefile_data <- st_read(paths$shapefile, quiet = TRUE)
shapefile_data$ID_1 <- as.numeric(shapefile_data$ID_1)

centroids <- st_centroid(shapefile_data) %>% st_coordinates()
shapefile_data$X <- centroids[, "X"]
shapefile_data$Y <- centroids[, "Y"]

district_key <- load_data(paths$district_key) %>%
  dplyr::select(District_ID_new, ID_1)

protests_summary <- load_data(paths$protests) %>%
  arrange(District_ID_new, wilaya_eng)  %>% 
  left_join(district_key, by = "District_ID_new") %>% 
  group_by(ID_1, wilaya_eng) %>% 
  summarise(protests_count = sum(count_obs))

shapefile_data <- left_join(shapefile_data, protests_summary, by = "ID_1")
shapefile_data$district_name <- shapefile_data$wilaya_eng

define_district_groups <- function() {
  list(
    up = c("Alger", "Bedjaia"),
    northbynorthwest = c("Blida", "Mostaghanem"),
    northbynortheast = c("Constantine"),
    northeast = c("Skikda"),
    rightbynortheast = c("Annaba"),
    southbysoutheast = c("Khenchela"),
    leftbynorthwest = c("Oran", "Jijel"),
    left = c("Ain Timouchent"),
    leftextra = c("Tlemcen"),
    right = c("El Tarf", "Tebessa"),
    rightextra = c("Souk Ahras")
  )
}

create_district_label <- function(data, districts, nudge_x = 0, nudge_y = 0, size = 2.5, direction = NULL) {
  params <- list(
    mapping = aes(label = wilaya_eng, x = X, y = Y),
    data = subset(data, wilaya_eng %in% districts),
    size = size, fontface = "plain", color = "black", family = "serif",
    box.padding = 0.35, point.padding = 0.2,
    segment.color = "black", segment.size = 0.3,
    arrow = arrow(type = "closed", angle = 25, length = unit(0.07, "cm"))
  )
  if (!is.null(direction)) { params$direction <- direction }
  if (nudge_x != 0) { params$nudge_x <- nudge_x }
  if (nudge_y != 0) { params$nudge_y <- nudge_y }
  do.call(ggrepel::geom_text_repel, params)
}

theme_map <- function() {
  theme_void(base_family = "serif") %+replace%
    theme(
      legend.position = "right",
      legend.justification = "top",
      legend.text.position = "right",
      legend.box.spacing = unit(-2.15, "cm"),
      legend.title = element_text(size = 7),
      legend.text = element_text(size = 6),
      legend.box.margin = margin(3, 1, 1, 0, unit = "cm")
    )
}

numbered_districts <- tibble::tribble(
  ~district_name,   ~num,
  "Ain Defla", 1,
  "Bordj Bou Arreridj", 2,
  "Bouira", 3,
  "Boumerdes", 4,
  "Chlef", 5,
  "Guelma", 6,
  "Mascara",        7,
  "Medea", 8,
  "Mila", 9,
  "Oum El Bouaghi", 10,
  "Relizane", 11,
  "Saida",          12,
  "Setif",          13,
  "Sidi Bel Abbes", 14,
  "Tipaza", 15,
  "Tissemssilt",    16,
  "Tizi Ouzou", 17
)

districts_with_manual_labels <- unlist(define_district_groups())

shapefile_text_subset <- shapefile_data %>%
  mutate(
    wilaya_eng = ifelse(wilaya_eng %in% numbered_districts$district_name, "", wilaya_eng),
    district_name = ifelse(district_name %in% numbered_districts$district_name, "", district_name),
    X = ifelse(wilaya_eng == "Djelfa", X - 0.25, X),
    Y = ifelse(wilaya_eng == "Djelfa", Y + 0.25, Y),
    X = ifelse(wilaya_eng == "Laghouat", X + 0.025, X),
    Y = ifelse(wilaya_eng == "Laghouat", Y + 0.025, Y),
    Y = ifelse(wilaya_eng == "Biskra", Y + 0.2, Y),
    X = ifelse(wilaya_eng == "El Oued", X + 0.45, X),
    Y = ifelse(wilaya_eng == "El Oued", Y - 0.5, Y)
  )

legend_inset_plot <- ggplot(numbered_districts, aes(y = num, x = 0, label = paste(num, district_name))) +
  geom_text(hjust = 0, family = "serif", size = 2.5, lineheight = 3.75) +
  scale_y_reverse(expand = expansion(mult = 0.05)) +
  theme_void() +
  xlim(0, 1.4)

legend_grob <- ggplotGrob(legend_inset_plot)

legend_box <- grobTree(
  roundrectGrob(x = 0.5, y = 0.5,              
                width  = 1,  height = 1,       
                r = unit(0.05, "snpc"),        
                gp = gpar(fill = "#F7F7F5",      
                          col  = "grey20",    
                          lwd  = 0.5)),     
  legend_grob
)

map_plot <- ggplot(data = shapefile_data) +
  geom_sf(aes(fill = protests_count), color = "white", size = 0.4) +
  scale_fill_viridis_c(
    option = "C", direction = -1, end = .9, begin = .1,
    name = "Protests"
  ) +
  guides(fill = guide_colorbar(title.position = "top")) +
  
  create_district_label(shapefile_text_subset, define_district_groups()$up, nudge_y = 0.75, direction = "y") +
  create_district_label(shapefile_text_subset, define_district_groups()$left, nudge_x = -1, nudge_y = 0.25) +
  create_district_label(shapefile_text_subset, define_district_groups()$leftextra, nudge_x = -2, direction = "x") +
  create_district_label(shapefile_text_subset, define_district_groups()$northbynorthwest, nudge_x = -1.5, nudge_y = 0.5) +
  create_district_label(shapefile_text_subset, define_district_groups()$northeast, nudge_x = 1, nudge_y = 0.5) +
  create_district_label(shapefile_text_subset, define_district_groups()$northbynortheast, nudge_x = 0.05, nudge_y = 1.25) +
  create_district_label(shapefile_text_subset, define_district_groups()$rightbynortheast, nudge_x = 1.8, nudge_y = 0.35) +
  create_district_label(shapefile_text_subset, define_district_groups()$leftbynorthwest, nudge_y = 0.01, direction = "y") +
  create_district_label(shapefile_text_subset, define_district_groups()$southbysoutheast, nudge_x = 2.5, nudge_y = -0.45) +
  create_district_label(shapefile_text_subset, define_district_groups()$right, nudge_x = 1.3, direction = "x") +
  create_district_label(shapefile_text_subset, define_district_groups()$rightextra, nudge_x = 1.5, direction = "x") +

  geom_text(
    aes(label = district_name, x = X, y = Y),
    data = subset(shapefile_text_subset, !(district_name %in% districts_with_manual_labels)),
    size = 2.5, fontface = "plain", color = "black", family = "serif"
  ) +
  geom_text(
    data = dplyr::left_join(
      dplyr::filter(shapefile_data, district_name %in% numbered_districts$district_name),
      numbered_districts,
      by = "district_name"
    ),
    aes(x = X, y = Y, label = num),
    size = 2.5, fontface = "plain", family = "serif", color = "black"
  ) +
  
  annotation_custom(
    grob = legend_box,
    xmin = 11,
    xmax = 14.25,
    ymin = 25,
    ymax = 32
  ) +
  
  coord_sf(expand = FALSE, ylim = c(19, 38), xlim = c(-10, 14.5)) +
  theme_map()

ggsave("map_protests_total_algeria.pdf", map_plot,
       device = cairo_pdf, width = 18, height = 16, dpi=1000, units = "cm")

## Figure 3: Predictions  ####################################################

generate_prediction_data <- function(model_data, var_name, model, n_points = 100, 
                                     lag_dv = 0, hirak_value = 0, include_specific = TRUE) {
  is_logged <- grepl("Log$", var_name)
  
  if (is_logged && include_specific) {
    
    orig_var_name <- sub("Log$", "", var_name)
    
    specific_vals <- c(0, 1, 5, 10, 20, 50)
    
    if (orig_var_name %in% names(model_data)) {
      orig_max <- max(model_data[[orig_var_name]], na.rm = TRUE)
    } else {
      orig_max <- exp(max(model_data[[var_name]], na.rm = TRUE)) - 1
    }
    
    specific_vals <- specific_vals[specific_vals <= orig_max]
    
    if (orig_max > 50) {
      orig_seq1 <- seq(0, 5, length.out = 30)      
      orig_seq2 <- seq(5, 20, length.out = 30)     
      orig_seq3 <- seq(20, orig_max, length.out = 40) 
      orig_values <- unique(c(specific_vals, orig_seq1, orig_seq2, orig_seq3))
    } else {
      orig_values <- unique(c(
        specific_vals,
        seq(0, orig_max, length.out = n_points)
      ))
    }
    
    orig_values <- sort(orig_values)
    
    var_values <- log(orig_values + 1)
  } else if (var_name == "UrbanGrowth" && include_specific) {
    var_min <- min(model_data[[var_name]], na.rm = TRUE)
    var_max <- max(model_data[[var_name]], na.rm = TRUE)
    
    specific_vals <- seq(-8, 16, by = 4)  
    
    specific_vals <- specific_vals[specific_vals >= var_min & specific_vals <= var_max]
    
    var_values <- unique(c(
      specific_vals,
      seq(var_min, var_max, length.out = n_points)
    ))
    
    var_values <- sort(var_values)
    
    orig_values <- var_values
  } else {
    var_min <- min(model_data[[var_name]], na.rm = TRUE)
    var_max <- max(model_data[[var_name]], na.rm = TRUE)
    var_values <- seq(var_min, var_max, length.out = n_points)
    
    orig_values <- var_values
    
    if (is_logged) {
      orig_values <- exp(var_values) - 1
    }
  }
  
  n_values <- length(var_values)
  
  pred_data <- model_data %>%
    summarise(across(where(is.numeric), ~mean(.x, na.rm = TRUE))) %>%
    slice(rep(1, n_values))
  
  pred_data$month <- factor("1", levels = as.character(1:12))
  pred_data$program_territory <- factor(
    "Nord-Centre", 
    levels = c("Nord-Centre", "Grand Sud", "Hauts-Plateaux-Centre", 
               "Hauts-Plateaux-Est", "Hauts-Plateaux-Ouest", 
               "Nord-Est", "Nord-Ouest", "Sud-Est", "Sud-Ouest")
  )
  
  pred_data[[var_name]] <- var_values
  
  if (var_name == "UrbanGrowth") {
    pred_data$UrbanGrowthSQ <- var_values^2
    
    if ("UrbanGrowthCentered" %in% names(model$model)) {
      center_value <- mean(model_data$UrbanGrowth, na.rm = TRUE)
      pred_data$UrbanGrowthCentered <- var_values - center_value
    }
    
    if ("UrbanGrowthSQCentered" %in% names(model$model)) {
      center_value <- mean(model_data$UrbanGrowthSQ, na.rm = TRUE)
      pred_data$UrbanGrowthSQCentered <- var_values^2 - center_value
    }
  }
  
  if (is_logged && paste0(var_name, "Centered") %in% names(model$model)) {
    center_value <- mean(model_data[[var_name]], na.rm = TRUE)
    pred_data[[paste0(var_name, "Centered")]] <- var_values - center_value
  }
  
  pred_data$LagDV <- lag_dv
  pred_data$Hirak <- hirak_value
  
  pred_data$original_scale <- orig_values
  
  pred_data$is_logged <- is_logged
  pred_data$variable_name <- var_name
  
  return(pred_data)
}

get_predictions <- function(model, pred_data, cluster_data, level = 0.95) {
  
  fit <- predict(model, newdata = pred_data, type = "response")
  
  vcov_mat <- vcovHC(model, type = "HC1", cluster = cluster_data$code)
  
  X <- model.matrix(terms(model), data = pred_data)
  
  se <- sqrt(diag(X %*% vcov_mat %*% t(X)))
  
  crit_val <- qnorm((1 + level) / 2)
  
  data.frame(
    fit = fit,
    lower = fit - crit_val * se,
    upper = fit + crit_val * se
  )
}

create_prediction_plot <- function(pred_data, preds, x_label, y_max = NULL, 
                                   highlight_values = NULL) {
  plot_data <- cbind(pred_data, preds)
  
  is_logged <- pred_data$is_logged[1]
  
  if (is.null(highlight_values)) {
    if (is_logged) {
      highlight_values <- c(0, 1, 5, 10, 20)
      max_val <- max(plot_data$original_scale)
      highlight_values <- highlight_values[highlight_values <= max_val]
    } else if (grepl("Urban", x_label)) {
      min_val <- min(plot_data$original_scale)
      max_val <- max(plot_data$original_scale)
      highlight_values <- seq(-8, 16, by = 4)
      highlight_values <- highlight_values[highlight_values >= min_val & 
                                             highlight_values <= max_val]
    }
  }
  
  p <- ggplot(plot_data, aes(x = original_scale, y = fit, ymin = lower, ymax = upper)) +
    geom_ribbon(alpha = 0.2) +
    geom_line(linewidth = 1) +
    labs(
      x = x_label,
      y = "Predicted Protests"
    ) +
    theme_bw() + 
    theme(
      aspect.ratio = 1,  
      panel.grid.minor = element_blank(),
      axis.text = element_text(size = 7, color = "black", family = "serif"),
      axis.title = element_text(size = 7, color = "black", family = "serif")
    )
  
  if (!is.null(highlight_values)) {
    highlight_points <- lapply(highlight_values, function(val) {
      idx <- which.min(abs(plot_data$original_scale - val))
      plot_data[idx, ]
    })
    
    if (length(highlight_points) > 0) {
      highlight_df <- do.call(rbind, highlight_points)
      
      p <- p + 
        geom_point(data = highlight_df, 
                   aes(x = original_scale, y = fit),
                   size = 3, color = "darkblue", alpha = 0.7)
    }
  }
  
  if (is_logged) {
    p <- p + 
      scale_x_continuous(
        trans = "log1p",  
        breaks = highlight_values,
        labels = highlight_values
      )
  } else {
    p <- p + 
      scale_x_continuous(
        breaks = highlight_values,
        labels = highlight_values
      )
  }
  
  if (!is.null(y_max)) {
    p <- p + ylim(0, y_max)
  }
  
  return(p)
}

create_all_plots <- function(model_data, models) {
  configs <- list(
    list(
      var_name = "QuestionsLog",
      model_name = "VV1",
      x_label = "Monthly Parliamentary Questions",
      y_max = 4,
      highlight_values = c(0, 1, 5, 10, 20)
    ),
    
    list(
      var_name = "LocalLog",
      model_name = "VV2",
      x_label = "Monthly Local-Focused Parliamentary Questions",
      y_max = 4,
      highlight_values = c(0, 1, 5, 10, 20)
    ),
    
    list(
      var_name = "UrbanGrowth",
      model_name = "VV1",
      x_label = "Urban Growth",
      y_max = 8,
      highlight_values = seq(-8, 16, by = 4)
    )
  )
  
  plots <- lapply(configs, function(config) {
    
    model_obj <- models[[config$model_name]]$model
    
    pred_data <- generate_prediction_data(
      model_data = model_data,
      var_name = config$var_name,
      model = model_obj,
      lag_dv = 0,
      hirak_value = 0
    )
    
    preds <- get_predictions(
      model = model_obj,
      pred_data = pred_data,
      cluster_data = model_data
    )
    
    create_prediction_plot(
      pred_data = pred_data,
      preds = preds,
      x_label = config$x_label,
      y_max = config$y_max,
      highlight_values = config$highlight_values
    )
  })
  
  combined_plot <- cowplot::plot_grid(
    plots[[1]], plots[[2]], plots[[3]],
    ncol = 1, align = "hv", axis = "rlbt"
  )
  
  list(
    plots = plots,
    combined = combined_plot
  )
}

all_plots <- create_all_plots(model_data, models)

ggsave("effects_plot_questions_local_urbangrowth.pdf", all_plots$combined, 
       device = cairo_pdf, width = 9, height = 18, units = "cm", dpi = 1000)
